/*
****************************************************************************************************************************
Author: Juan S. Munoz-Morales
Year: 2021
Prepared for Paper: Some Children Left Behind? Variation in the Effects of an Educational Intervention
***************************************************************************************************************************
	This do-file estimates the Fréchet Hoffding Bounds and all its Components as in Djebbari & Smith (2008) and Buhl-Wiggers et al. (2021)
	
	Steps:
				1. Marginal distributions are collapsed into percentiles. 
				2. The percentile difference is equivalent to the Perfect Positive Dependence case. 
				3. The control marginal is inverted, and a second percentile difference is computed to estimate the perfect 
				   Negative Dependence Case. 
				4. The percente positive, impact standard deviation, and outcome correlation are also computed.   
				5. Standard errors are computed by J (1000) replications, by sampling with replacement and computing PPD and PND in the sample.    
				6. The command fhbounds is define with the syntax : 
				
fhbounds varlist(max=1) [if], Reps(integer) TREATment(varlist) CONtrol(varname) strata(varname) cluster(varname) [SAve(string) BIAScorrected]
				
	Information necessary for estimation:
		reps: Number of repetitions
		TREATment: varlist of categorical variables indicating the treatment(s)
		CONtrol: a categorical value that indicates the control group. 
				
	Options:
		strata: stratification variable to compute the boostrap standard errors
		cluster: cluster variable to compute the boostrap standard errors
		save: saver the boostrap replications using the file specified in parentheses.
		BIAScorrected: computes the bias-corrected standard errors

*/
***************************************************************************************************************************
cap program drop fhbounds
program define fhbounds
syntax varlist(max=1) [if], Reps(integer) TREATment(varlist) CONtrol(varname) [strata(varname) cluster(varname) SAve(string) BIAScorrected]
	clear matrix
	scalar drop _all	
	drop if `varlist'==.
	if "`if'"!="" {
		qui keep `if'
	}
	display "FH Bounds for " _N " observations"
	// Temporary save Data
		tempfile data
		qui save `data'
	// Compute Variances
		qui sum `varlist' if `control'==1
		scalar var_y0 = r(sd)^2
		display "Descriptive Stats by Treatment Arm"
		foreach var in `treatment' {		
			display "For `var':"
			sum `varlist' if `var'==1
			scalar var_y`var'_pt = r(sd)^2
		}
	// Collapse Distribution into Percentiles		
		pctile `control'_pt = `varlist' if `control'==1, nq(100)
		local treatment_pt ""
		local order ""
		foreach var in `treatment' {				
			pctile `var'_pt = `varlist' if `var'==1, nq(100)
			local treatment_pt `treatment_pt' `var'_pt
			local order `order' FH_up_`var' FH_lo_`var'
		}
	// Keep Percentiles and variables
		qui drop if `control'_pt==.
		keep `control'_pt `treatment_pt'
		gen p = _n		
	// Reverse Control order	
		preserve 
			keep `control'
			gsort -`control'
			gen p = _n
			ren `control' `control'_inv
			tempfile file_reverse
			qui save `file_reverse'
		restore	
		qui merge 1:1 p using `file_reverse', nogen 
	// FH Uppper Bound
		foreach t in `treatment_pt' {		
			// Bound
			gen FH_up_`t'  = `t' - `control'_pt
			// Percentage Positive
			gen ppos_up_`t'T = (FH_up_`t'>0)
			qui sum ppos_up_`t'
			scalar ppos_up_`t' = r(mean)
			// Outcome Correlation
			qui corr `control'_pt `t'
			scalar rho_up_`t' = r(rho)  
			// Impact Standard Deviation
			scalar impvar_up_`t' = sqrt(var_y0 + var_y`t' - (2*rho_up_`t'*sqrt(var_y0 * var_y`t')))
			// Matrices
			mat UB_`t' = ppos_up_`t' \ impvar_up_`t' \ rho_up_`t'
		}		
	// FH Lower Bound
		foreach t in `treatment_pt' {
			// Bound
			gen FH_lo_`t' = `t' - `control'_inv
			// Percentage Positive
			gen ppos_lo_`t' = (FH_lo_`t'>0)
			qui sum ppos_lo_`t'
			scalar ppos_lo_`t' = r(mean)
			// Outcome Correlation
			qui corr `control'_inv `t'
			scalar rho_lo_`t'= r(rho)
			// Impact Standard Deviation
			scalar impvar_lo_`t' = sqrt(var_y0 + var_y`t' - (2*rho_lo_`t' * sqrt(var_y0 * var_y`t')))
			// Matrices
			mat LB_`t' = ppos_lo_`t' \ impvar_lo_`t' \ rho_lo_`t'
		}		
		*qui keep if p==5 | p==25 | p==50 | p==75 | p==95  
		keep p FH_*
		format FH_* %9.3g
		gen i = 0
		mkmat i p `order', mat(impacts)		
	// 	BOOTSTRAP SE	
	local counter = 1
	set seed 67296786
	while `counter' <`reps' {
		use `data', clear 
		if "`strata'"!="" & "`cluster'"!="" {
			bsample, strata(`strata') cluster(`cluster')
		}
		else if "`strata'"!="" & "`cluster'"=="" {
			bsample, strata(`strata') 
		}
		else if "`strata'"=="" & "`cluster'"!="" {
			bsample, cluster(`cluster')
		}
		else {
			bsample
		}
		// Collapse Distribution into Percentiles		
		pctile `control'_pt = `varlist' if `control'==1, nq(100)
		foreach var in `treatment' {				
			pctile `var'_pt = `varlist' if `var'==1, nq(100)
		}
	// Keep Percentiles and variables
		qui drop if `control'_pt==.
		keep `control'_pt `treatment_pt'
		gen p = _n	
	// Reverse Control order	
		preserve 
			keep `control'
			gsort -`control'
			gen p = _n
			ren `control' `control'_inv
			tempfile file_reverse
			qui save `file_reverse'
		restore	
		qui merge 1:1 p using `file_reverse', nogen 
	// Compute Bounds
		foreach t in `treatment_pt' {		
			// Upper
				// Bounding Upper Distribution
				gen FH_up_`t' = `t' - `control'_pt
				// Percentage Positive
				gen ppos_up_`t'T = (FH_up_`t'>0)
				qui sum ppos_up_`t'
				scalar ppos_up_`t' = r(mean)
				// Outcome Correlation
				qui corr `control'_pt `t'
				scalar rho_up_`t' = r(rho)  
				// Impact Standard Deviation
				scalar impvar_up_`t' = sqrt(var_y0 + var_y`t' - (2*rho_up_`t'*sqrt(var_y0 * var_y`t')))
			
			// Lower 
				// Bounding Lower Distribution
				gen FH_lo_`t' = `t' - `control'_inv
				// Percentage Positive
				gen ppos_lo_`t' = (FH_lo_`t'>0)
				qui sum ppos_lo_`t'
				scalar ppos_lo_`t' = r(mean)
				// Outcome Correlation
				qui corr `control'_inv `t'
				scalar rho_lo_`t'= r(rho)
				// Impact Standard Deviation
				scalar impvar_lo_`t' = sqrt(var_y0 + var_y`t' - (2*rho_lo_`t' * sqrt(var_y0 * var_y`t')))

		}
	// Shape Data to append
		keep p FH_*
		gen i = `counter'
		qui reshape wide FH_up* FH_lo*, i(i) j(p)
		foreach t in `treatment_pt' {	
			foreach bound in up lo {
				gen ppos_`bound'_`t'   = ppos_`bound'_`t'
				gen rho_`bound'_`t'    = rho_`bound'_`t'
				gen impvar_`bound'_`t' = impvar_`bound'_`t'
			}
		}
		if `counter' == 1 {
			tempfile base 
			qui save `base'
		}
		else {
			tempfile se
			qui save `se'
			use `base', clear
			append using `se'
			qui save `base', replace
		}
	display "FH Bounds Bootstrap Replication: `counter'"	
	local counter = `counter' + 1	
	}
	// Save bootstrap replications
	if "`save'" !="" {	
		noisily save "`save'", replace
	}
	// Put together the matrices and create data set with BS standard errors
		qui {
		// Collapse Standard Deviations
		drop i
		ds
		local vars = r(varlist)
		qui collapse (sd) `vars' 	
		gen i = 1
		local sd_vars ""
		foreach var in `treatment' {
			local sd_vars `sd_vars' FH_up_`var'_pt FH_lo_`var'_pt
		}	
		// Create Matrices
			// S.E. Quantiles
			preserve
				keep i FH_*
				qui reshape long `sd_vars', i(i) j(p)		
				mkmat i p `order', mat(se)
			restore
			// S.E other estimates
			preserve
				drop FH_*
				local sd_vars_other ""
				foreach var in `treatment' {
					foreach bound in up lo {
						rename  ppos_`bound'_`var'  `var'_`bound'101
						rename  impvar_`bound'_`var' `var'_`bound'102
						rename  rho_`bound'_`var'    `var'_`bound'103
						local sd_vars_other `sd_vars_other' `var'_`bound'
					}
				}
				reshape long `sd_vars_other', i(i) j(p)		
				mkmat i p `sd_vars_other', mat(se_2)
			restore
			// Create Point Estimate Matrix
			mat a1 = J(3,1,0)
			mat a2 = 101\102\103
			mat other = a1, a2
			foreach var in `treatment' {
				mat other = other, UB_`var'_pt, LB_`var'_pt
			}	
		// Create Outcome
			mat full = impacts \ se \ other \ se_2
			mat list full
			clear
			svmat full, n(col)
			sort p i 	
		// 	Labels
			tostring p, gen(p_exp)
			replace p_exp = "" if i==1
			forvalue n=1/99 {
				replace p_exp = "`n'th"               if p_exp=="`n'" & i == 0
			}
				replace p_exp = "Fraction Positive"            if p_exp == "101" & i == 0
				replace p_exp = "Impact Standard Deviation"    if p_exp == "102" & i == 0
				replace p_exp = "Outcome Correlation"          if p_exp == "103" & i == 0

		}
		// Create string vars for exporting to table
		qui gen aux_1 = "(" if i==1
		qui gen aux_2 = ")" if i==1
		foreach var in `sd_vars' { 
			qui tostring `var', gen(`var'_st) force format(%9.3f)
			qui replace `var'_st = aux_1 + `var'_st + aux_2 if i==1
		}
		drop aux*
		foreach var in `treatment' {
				label var FH_up_`var'_pt_st    "`var' PPD"
				label var FH_lo_`var'_pt_st   "`var' PND"
			}			
		gen sortvar = _n
	// IN CASE THAT BIAS-CORRECTED CI IS SELECTED: I load the previous data sets and estimate the BIAS-corrected CI
	if "`biascorrected'" != "" {
		tempfile main 
		qui save `main'
		// TAKE POINT ESTIMATES AND RESHAPE THEM TO MERGE TO THE BS REPLICATIONS
		qui drop if i==1
		qui drop sortvar *st p_exp i
		gen i = 1
		
		// Reshape
		local reshape_var
		foreach var in `treatment' {
			local reshape_var `reshape_var' FH_up_`var'_pt FH_lo_`var'_pt
		}
		qui reshape wide `reshape_var', i(i) j(p)
		
		ren FH_*101 ppos_* 
		ren FH_*102 impvar_*
		ren FH_*103 rho_*
		ren * PE_*
		ren PE_i i 
		tempfile PE
		qui save `PE'
		// Load BS replications
		qui use `base', clear
		drop i
		qui ds
		local vars = r(varlist)		
		// Merge Point Estimates
		gen i = 1
		qui merge m:1 i using `PE', nogen 
		// Compute CI per variable 
		foreach var of local vars {
			// Bias corrected CI
			// Compute % Under the parameter. 
			qui gen P_under_PE = `var' < PE_`var'
			qui sum P_under_PE 
				if r(mean) == 0 { // There are values for which the mean=0 so we give the bs interval.
					_pctile `var', nq(1000)
					gen lo_`var' = r(r25)
					gen up_`var'  = r(r975)
				}
				else if r(mean) > 0 {
				// Compute the quantile in the bootstrap distribution including the shifter percentage [(normal(2*invnormal(r(mean))]. 
				// Multiply *1000 to put in quantiles that are readable by the _pctile function.
				// The string,real, and round functions are to ensure that the qunaitle is between 1 and 999
				local BC_lo = 1000*real(string(round(normal(2*invnormal(r(mean)) - invnormal(0.975)), 0.001), "%9.3f"))
				local BC_hi = 1000*real(string(round(normal(2*invnormal(r(mean)) + invnormal(0.975)), 0.001), "%9.3f"))
				// Some values are = 1 so I have to put them in the quantile 99.9. 
				if `BC_lo' == 1000 {
					local BC_lo = 999
				}
				if `BC_hi' == 1000 {
					local BC_hi = 999
				}
				// Return the quantiles	
				_pctile `var', nq(1000)
				// Define both limits of the bias-corrected CI. 
				qui gen lo_`var' = r(r`BC_lo')
				qui gen up_`var' = r(r`BC_hi')
			}
			drop P_under_PE 
		}
		// Collapse
		collapse (mean) lo* up*
		// Reshape
			// Rename
		local rs_vars ""	
		foreach var in `treatment' {
			foreach bound in up lo {
				foreach ci in up lo {
					rename  `ci'_ppos_`bound'_`var'     `ci'_FH_`bound'_`var'_pt101
					rename  `ci'_impvar_`bound'_`var'   `ci'_FH_`bound'_`var'_pt102
					rename  `ci'_rho_`bound'_`var'      `ci'_FH_`bound'_`var'_pt103
					local rs_vars `rs_vars' `ci'_FH_`bound'_`var'_pt
				}
			}
		}
		// Reshape
		gen i = 1
		qui reshape long `rs_vars', i(i) j(p)
		// Merge to original results
		qui merge 1:m i p using `main', nogen
		foreach var in `treatment' {
			foreach bound in up lo {
					qui gen CI_`bound'_`var' = "[" + string(lo_FH_`bound'_`var'_pt, "%9.3f") +","+ string(up_FH_`bound'_`var'_pt, "%9.3f")  +"]" if i == 1
					qui replace FH_`bound'_`var'_pt_st = CI_`bound'_`var' if i==1
					drop lo_FH_`bound'_`var'_pt up_FH_`bound'_`var'_pt CI_`bound'_`var'
			}
		}	
	sort sortvar	
	}
	foreach var in `treatment' {
		label var FH_up_`var'_pt_st    "`var' PPD"
		label var FH_lo_`var'_pt_st   "`var' PND"
	}	
end

	
